From 31dab8179d87925789a8fca638f7d457045f4abf Mon Sep 17 00:00:00 2001 From: Seunghwan Hong Date: Fri, 30 Sep 2022 15:17:45 +0900 Subject: [PATCH] [BEAM-10785] Change RowAsDictJsonCoder to not ensure ASCII while encoding (#22312) * Change RowAsDictJsonCoder to not ensure ASCII while encoding Signed-off-by: Seunghwan Hong * Format code, Refactor test for readability Signed-off-by: Seunghwan Hong Signed-off-by: Seunghwan Hong Co-authored-by: Pablo --- CHANGES.md | 1 + sdks/python/apache_beam/io/gcp/bigquery_tools.py | 5 ++++- sdks/python/apache_beam/io/gcp/bigquery_tools_test.py | 7 +++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 8673b4010059..87d3d685eb31 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -61,6 +61,7 @@ * Support for X source added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). * Decreased TextSource CPU utilization by 2.3x (Java) ([#23193](https://github.com/apache/beam/issues/23193)). * Fixed bug when using SpannerIO with RuntimeValueProvider options (Java) ([#22146](https://github.com/apache/beam/issues/22146)). +* Fixed issue for unicode rendering on WriteToBigQuery ([#10785](https://github.com/apache/beam/issues/10785)) ## New Features / Improvements diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools.py b/sdks/python/apache_beam/io/gcp/bigquery_tools.py index b5e50c1d42ad..27428aca5335 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_tools.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_tools.py @@ -1538,7 +1538,10 @@ def encode(self, table_row): # to the programmer that they have used NAN/INF values. try: return json.dumps( - table_row, allow_nan=False, default=default_encoder).encode('utf-8') + table_row, + allow_nan=False, + ensure_ascii=False, + default=default_encoder).encode('utf-8') except ValueError as e: raise ValueError( '%s. %s. Row: %r' % (e, JSON_COMPLIANCE_ERROR, table_row)) diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py b/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py index 8c26e75a1ff4..2ee4f374497d 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py @@ -1052,6 +1052,13 @@ def test_invalid_json_inf(self): def test_invalid_json_neg_inf(self): self.json_compliance_exception(float('-inf')) + def test_ensure_ascii(self): + coder = RowAsDictJsonCoder() + test_value = {'s': '🎉'} + output_value = b'{"s": "\xf0\x9f\x8e\x89"}' + + self.assertEqual(output_value, coder.encode(test_value)) + @unittest.skipIf(HttpError is None, 'GCP dependencies are not installed') class TestJsonRowWriter(unittest.TestCase):