Skip to content

Commit e5ef16e

Browse files
committed
fix: Reconfigure the encoding of standard input according to the --encoding option, closes #1038
1 parent 12be2ff commit e5ef16e

14 files changed

+55
-51
lines changed

CHANGELOG.rst

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ Unreleased
1010
* :doc:`/scripts/csvstat` adds a :code:`--non-nulls` option to only output counts of non-null values.
1111
* :doc:`/scripts/csvstat` adds a :code:`--max-precision` option to only output the most decimal places.
1212
* feat: Add a :code:`--null-value` option to commands with the :code:`--blanks` option, to convert additional values to NULL.
13+
* fix: Reconfigure the encoding of standard input according to the :code:`--encoding` option, which defaults to ``utf-8-sig``. Affected users no longer need to set the ``PYTHONIOENCODING`` environment variable.
1314
* fix: Prompt the user if additional input is expected (i.e. if no input file or piped data is provided) in :doc:`/scripts/csvjoin`, :doc:`/scripts/csvsql` and :doc:`/scripts/csvstack`.
1415
* fix: No longer errors if a NUL byte occurs in an input file.
1516
* Add Python 3.12 support.

csvkit/cli.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -233,11 +233,14 @@ def _init_common_parser(self):
233233
'-V', '--version', action='version', version='%(prog)s 1.2.0',
234234
help='Display version information and exit.')
235235

236-
def _open_input_file(self, path):
236+
def _open_input_file(self, path, opened=False):
237237
"""
238238
Open the input file specified on the command line.
239239
"""
240240
if not path or path == '-':
241+
# "UnsupportedOperation: It is not possible to set the encoding or newline of stream after the first read"
242+
if not opened:
243+
sys.stdin.reconfigure(encoding=self.args.encoding)
241244
f = sys.stdin
242245
else:
243246
extension = splitext(path)[1]

csvkit/utilities/csvstack.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def main(self):
108108
output.writerow(headers)
109109

110110
for i, path in enumerate(self.args.input_paths):
111-
f = self._open_input_file(path)
111+
f = self._open_input_file(path, opened=True)
112112
file_is_stdin = path == '-'
113113

114114
if has_groups:

tests/test_convert/test_fixed.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from io import StringIO
1+
import io
22

33
from csvkit.convert import fixed
44
from csvkit.utilities.in2csv import In2CSV
@@ -23,7 +23,7 @@ def test_fixed_skip_lines(self):
2323
self.assertEqual(f.read(), output)
2424

2525
def test_fixed_no_inference(self):
26-
input_file = StringIO(' 1 2 3')
26+
input_file = io.BytesIO(b' 1 2 3')
2727

2828
with stdin_as_string(input_file):
2929
self.assertLines(['--no-inference', '-f', 'fixed', '--schema',
@@ -36,7 +36,7 @@ def test_fixed_no_inference(self):
3636

3737
def test_fixed_streaming(self):
3838
with open('examples/testfixed') as f, open('examples/testfixed_schema.csv') as schema:
39-
output_file = StringIO()
39+
output_file = io.StringIO()
4040
fixed.fixed2csv(f, schema, output=output_file)
4141
output = output_file.getvalue()
4242
output_file.close()
@@ -91,7 +91,7 @@ def test_schematic_line_parser(self):
9191
bar,6,2
9292
baz,8,5"""
9393

94-
f = StringIO(schema)
94+
f = io.StringIO(schema)
9595
parser = fixed.FixedWidthRowParser(f)
9696
f.close()
9797

tests/test_utilities/test_csvclean.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1+
import io
12
import os
23
import sys
3-
from io import StringIO
44
from unittest.mock import patch
55

66
from csvkit.utilities.csvclean import CSVClean, launch_new_instance
@@ -17,7 +17,7 @@ def tearDown(self):
1717

1818
def assertCleaned(self, basename, output_lines, error_lines, additional_args=[]):
1919
args = [f'examples/{basename}.csv'] + additional_args
20-
output_file = StringIO()
20+
output_file = io.StringIO()
2121

2222
utility = CSVClean(args, output_file)
2323
utility.run()

tests/test_utilities/test_csvformat.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1+
import io
12
import sys
2-
from io import StringIO
33
from unittest.mock import patch
44

55
from csvkit.utilities.csvformat import CSVFormat, launch_new_instance
@@ -54,7 +54,7 @@ def test_tab_delimiter(self):
5454
])
5555

5656
def test_quotechar(self):
57-
input_file = StringIO('a,b,c\n1*2,3,4\n')
57+
input_file = io.BytesIO(b'a,b,c\n1*2,3,4\n')
5858

5959
with stdin_as_string(input_file):
6060
self.assertLines(['-Q', '*'], [
@@ -65,7 +65,7 @@ def test_quotechar(self):
6565
input_file.close()
6666

6767
def test_doublequote(self):
68-
input_file = StringIO('a\n"a ""quoted"" string"')
68+
input_file = io.BytesIO(b'a\n"a ""quoted"" string"')
6969

7070
with stdin_as_string(input_file):
7171
self.assertLines(['-P', '#', '-B'], [
@@ -76,7 +76,7 @@ def test_doublequote(self):
7676
input_file.close()
7777

7878
def test_escapechar(self):
79-
input_file = StringIO('a,b,c\n1"2,3,4\n')
79+
input_file = io.BytesIO(b'a,b,c\n1"2,3,4\n')
8080

8181
with stdin_as_string(input_file):
8282
self.assertLines(['-P', '#', '-U', '3'], [

tests/test_utilities/test_csvjson.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1+
import io
12
import json
23
import sys
3-
from io import StringIO
44
from unittest.mock import patch
55

66
from csvkit.utilities.csvjson import CSVJSON, launch_new_instance
@@ -58,7 +58,7 @@ def test_keying(self):
5858
self.assertDictEqual(js, {'True': {'a': True, 'c': 3.0, 'b': 2.0}})
5959

6060
def test_duplicate_keys(self):
61-
output_file = StringIO()
61+
output_file = io.StringIO()
6262
utility = CSVJSON(['-k', 'a', 'examples/dummy3.csv'], output_file)
6363
self.assertRaisesRegex(ValueError,
6464
'Value True is not unique in the key column.',

tests/test_utilities/test_csvlook.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1+
import io
12
import sys
2-
from io import StringIO
33
from unittest.mock import patch
44

55
from csvkit.utilities.csvlook import CSVLook, launch_new_instance
@@ -127,7 +127,7 @@ def test_max_column_width(self):
127127
])
128128

129129
def test_stdin(self):
130-
input_file = StringIO('a,b,c\n1,2,3\n4,5,6\n')
130+
input_file = io.BytesIO(b'a,b,c\n1,2,3\n4,5,6\n')
131131

132132
with stdin_as_string(input_file):
133133
self.assertLines([], [

tests/test_utilities/test_csvsort.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1+
import io
12
import sys
2-
from io import StringIO
33
from unittest.mock import patch
44

55
from csvkit.utilities.csvsort import CSVSort, launch_new_instance
@@ -78,7 +78,7 @@ def test_sort_t_and_nulls(self):
7878
self.assertEqual(test_order, new_order)
7979

8080
def test_stdin(self):
81-
input_file = StringIO('a,b,c\n4,5,6\n1,2,3\n')
81+
input_file = io.BytesIO(b'a,b,c\n4,5,6\n1,2,3\n')
8282

8383
with stdin_as_string(input_file):
8484
self.assertLines([], [

tests/test_utilities/test_csvsql.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1+
import io
12
import os
23
import sys
3-
from io import StringIO
44
from textwrap import dedent
55
from unittest.mock import patch
66

@@ -108,7 +108,7 @@ def test_linenumbers(self):
108108
''')) # noqa: W291
109109

110110
def test_stdin(self):
111-
input_file = StringIO('a,b,c\n4,2,3\n')
111+
input_file = io.BytesIO(b'a,b,c\n4,2,3\n')
112112

113113
with stdin_as_string(input_file):
114114
sql = self.get_output(['--tables', 'foo'])
@@ -124,7 +124,7 @@ def test_stdin(self):
124124
input_file.close()
125125

126126
def test_stdin_and_filename(self):
127-
input_file = StringIO("a,b,c\n1,2,3\n")
127+
input_file = io.BytesIO(b'a,b,c\n1,2,3\n')
128128

129129
with stdin_as_string(input_file):
130130
sql = self.get_output(['-', 'examples/dummy.csv'])
@@ -135,7 +135,7 @@ def test_stdin_and_filename(self):
135135
input_file.close()
136136

137137
def test_query(self):
138-
input_file = StringIO("a,b,c\n1,2,3\n")
138+
input_file = io.BytesIO(b'a,b,c\n1,2,3\n')
139139

140140
with stdin_as_string(input_file):
141141
sql = self.get_output(['--query', 'SELECT m.usda_id, avg(i.sepal_length) AS mean_sepal_length FROM iris '
@@ -150,7 +150,7 @@ def test_query(self):
150150
input_file.close()
151151

152152
def test_query_empty(self):
153-
input_file = StringIO()
153+
input_file = io.BytesIO()
154154

155155
with stdin_as_string(input_file):
156156
output = self.get_output(['--query', 'SELECT 1'])
@@ -185,14 +185,14 @@ def test_before_after_insert(self):
185185
'SELECT 1; CREATE TABLE foobar (date DATE)', '--after-insert',
186186
'INSERT INTO dummy VALUES (0, 5, 6)'])
187187

188-
output_file = StringIO()
188+
output_file = io.StringIO()
189189
utility = SQL2CSV(['--db', 'sqlite:///' + self.db_file, '--query', 'SELECT * FROM foobar'], output_file)
190190
utility.run()
191191
output = output_file.getvalue()
192192
output_file.close()
193193
self.assertEqual(output, 'date\n')
194194

195-
output_file = StringIO()
195+
output_file = io.StringIO()
196196
utility = SQL2CSV(['--db', 'sqlite:///' + self.db_file, '--query', 'SELECT * FROM dummy'], output_file)
197197
utility.run()
198198
output = output_file.getvalue()

tests/test_utilities/test_csvstack.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def test_skip_lines(self):
2121
])
2222

2323
def test_skip_lines_stdin(self):
24-
with open('examples/test_skip_lines.csv') as f, stdin_as_string(f):
24+
with open('examples/test_skip_lines.csv', 'rb') as f, stdin_as_string(f):
2525
self.assertRows(['--skip-lines', '3', '-', 'examples/test_skip_lines.csv'], [
2626
['a', 'b', 'c'],
2727
['1', '2', '3'],
@@ -62,14 +62,14 @@ def test_multiple_file_stack_col_ragged(self):
6262
])
6363

6464
def test_multiple_file_stack_col_ragged_stdin(self):
65-
with open('examples/dummy.csv') as f, stdin_as_string(f):
65+
with open('examples/dummy.csv', 'rb') as f, stdin_as_string(f):
6666
self.assertRows(['-', 'examples/dummy_col_shuffled_ragged.csv'], [
6767
['a', 'b', 'c', 'd'],
6868
['1', '2', '3', ''],
6969
['1', '2', '3', '4'],
7070
])
7171

72-
with open('examples/dummy.csv') as f, stdin_as_string(f):
72+
with open('examples/dummy.csv', 'rb') as f, stdin_as_string(f):
7373
self.assertRows(['examples/dummy_col_shuffled_ragged.csv', '-'], [
7474
['b', 'c', 'a', 'd'],
7575
['2', '3', '1', '4'],
@@ -101,14 +101,14 @@ def test_no_header_row_basic(self):
101101
])
102102

103103
def test_no_header_row_basic_stdin(self):
104-
with open('examples/no_header_row.csv') as f, stdin_as_string(f):
104+
with open('examples/no_header_row.csv', 'rb') as f, stdin_as_string(f):
105105
self.assertRows(['--no-header-row', '-', 'examples/no_header_row2.csv'], [
106106
['a', 'b', 'c'],
107107
['1', '2', '3'],
108108
['4', '5', '6'],
109109
])
110110

111-
with open('examples/no_header_row.csv') as f, stdin_as_string(f):
111+
with open('examples/no_header_row.csv', 'rb') as f, stdin_as_string(f):
112112
self.assertRows(['--no-header-row', 'examples/no_header_row2.csv', '-'], [
113113
['a', 'b', 'c'],
114114
['4', '5', '6'],

tests/test_utilities/test_in2csv.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1+
import io
12
import os
23
import sys
3-
from io import StringIO
44
from unittest.mock import patch
55

66
from csvkit.utilities.in2csv import In2CSV, launch_new_instance
@@ -38,7 +38,7 @@ def test_blanks(self):
3838
self.assertConverted('csv', 'examples/blanks.csv', 'examples/blanks.csv', ['--blanks'])
3939

4040
def test_null_value(self):
41-
input_file = StringIO('a,b\nn/a,\\N')
41+
input_file = io.BytesIO(b'a,b\nn/a,\\N')
4242

4343
with stdin_as_string(input_file):
4444
self.assertLines(['-f', 'csv', '--null-value', '\\N'], [
@@ -49,7 +49,7 @@ def test_null_value(self):
4949
input_file.close()
5050

5151
def test_null_value_blanks(self):
52-
input_file = StringIO('a,b\nn/a,\\N')
52+
input_file = io.BytesIO(b'a,b\nn/a,\\N')
5353

5454
with stdin_as_string(input_file):
5555
self.assertLines(['-f', 'csv', '--null-value', '\\N', '--blanks'], [
@@ -153,7 +153,7 @@ def test_csv_no_headers_streaming(self):
153153
['--no-header-row', '--no-inference', '--snifflimit', '0'])
154154

155155
def test_csv_datetime_inference(self):
156-
input_file = StringIO('a\n2015-01-01T00:00:00Z')
156+
input_file = io.BytesIO(b'a\n2015-01-01T00:00:00Z')
157157

158158
with stdin_as_string(input_file):
159159
self.assertLines(['-f', 'csv'], [
@@ -182,9 +182,9 @@ def test_xlsx_no_inference(self):
182182
])
183183

184184
def test_geojson_no_inference(self):
185-
input_file = StringIO(
186-
'{"a": 1, "b": 2, "type": "FeatureCollection", "features": [{"geometry": {}, "properties": '
187-
'{"a": 1, "b": 2, "c": 3}}]}')
185+
input_file = io.BytesIO(
186+
b'{"a": 1, "b": 2, "type": "FeatureCollection", "features": [{"geometry": {}, "properties": '
187+
b'{"a": 1, "b": 2, "c": 3}}]}')
188188

189189
with stdin_as_string(input_file):
190190
self.assertLines(['--no-inference', '-f', 'geojson'], [
@@ -195,7 +195,7 @@ def test_geojson_no_inference(self):
195195
input_file.close()
196196

197197
def test_json_no_inference(self):
198-
input_file = StringIO('[{"a": 1, "b": 2, "c": 3}]')
198+
input_file = io.BytesIO(b'[{"a": 1, "b": 2, "c": 3}]')
199199

200200
with stdin_as_string(input_file):
201201
self.assertLines(['--no-inference', '-f', 'json'], [
@@ -206,7 +206,7 @@ def test_json_no_inference(self):
206206
input_file.close()
207207

208208
def test_ndjson_no_inference(self):
209-
input_file = StringIO('{"a": 1, "b": 2, "c": 3}')
209+
input_file = io.BytesIO(b'{"a": 1, "b": 2, "c": 3}')
210210

211211
with stdin_as_string(input_file):
212212
self.assertLines(['--no-inference', '-f', 'ndjson'], [

tests/test_utilities/test_sql2csv.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1+
import io
12
import os
23
import sys
3-
from io import StringIO
44
from unittest.mock import patch
55

66
try:
@@ -71,7 +71,7 @@ def test_file_with_query(self):
7171
self.assertTrue('54' in csv)
7272

7373
def test_stdin(self):
74-
input_file = StringIO('select cast(3.1415 * 13.37 as integer) as answer')
74+
input_file = io.BytesIO(b'select cast(3.1415 * 13.37 as integer) as answer')
7575

7676
with stdin_as_string(input_file):
7777
csv = self.get_output([])
@@ -82,7 +82,7 @@ def test_stdin(self):
8282
input_file.close()
8383

8484
def test_stdin_with_query(self):
85-
input_file = StringIO('select cast(3.1415 * 13.37 as integer) as answer')
85+
input_file = io.BytesIO(b'select cast(3.1415 * 13.37 as integer) as answer')
8686

8787
with stdin_as_string(input_file):
8888
csv = self.get_output(['--query', 'select 6*9 as question'])
@@ -93,7 +93,7 @@ def test_stdin_with_query(self):
9393
input_file.close()
9494

9595
def test_stdin_with_file(self):
96-
input_file = StringIO('select cast(3.1415 * 13.37 as integer) as answer')
96+
input_file = io.BytesIO(b'select cast(3.1415 * 13.37 as integer) as answer')
9797

9898
with stdin_as_string(input_file):
9999
csv = self.get_output(['examples/test.sql'])
@@ -104,7 +104,7 @@ def test_stdin_with_file(self):
104104
input_file.close()
105105

106106
def test_stdin_with_file_and_query(self):
107-
input_file = StringIO('select cast(3.1415 * 13.37 as integer) as answer')
107+
input_file = io.BytesIO(b'select cast(3.1415 * 13.37 as integer) as answer')
108108

109109
with stdin_as_string(input_file):
110110
csv = self.get_output(['examples/test.sql', '--query', 'select 6*9 as question'])

0 commit comments

Comments
 (0)