Skip to content

Commit 70b8f7f

Browse files
committed
feat(csvclean): Add --header-normalize-space option, closes #1056
1 parent 95dc26d commit 70b8f7f

File tree

5 files changed

+20
-3
lines changed

5 files changed

+20
-3
lines changed

CHANGELOG.rst

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
Other changes:
99

10+
* feat: :doc:`/scripts/csvclean` adds a :code:`--header-normalize-space` option to strip leading and trailing whitespace and replace sequences of whitespace characters by a single space in the header.
1011
* feat: The :code:`--quoting` option accepts 4 (`csv.QUOTE_STRINGS <https://docs.python.org/3/library/csv.html#csv.QUOTE_STRINGS>`__) and 5 (`csv.QUOTE_NOTNULL <https://docs.python.org/3/library/csv.html#csv.QUOTE_NOTNULL>`__) on Python 3.12.
1112
* feat: :doc:`/scripts/csvformat`: The :code:`--out-quoting` option accepts 4 (`csv.QUOTE_STRINGS <https://docs.python.org/3/library/csv.html#csv.QUOTE_STRINGS>`__) and 5 (`csv.QUOTE_NOTNULL <https://docs.python.org/3/library/csv.html#csv.QUOTE_NOTNULL>`__) on Python 3.12.
1213
* fix: :doc:`/scripts/csvformat`: The :code:`--out-quoting` option works with 2 (`csv.QUOTE_NONUMERIC <https://docs.python.org/3/library/csv.html#csv.QUOTE_NOTNUMERIC>`__). Use the :code:`--locale` option to set the locale of any formatted numbers.

csvkit/cleanup.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,12 @@ class RowChecker:
2828
Iterate over rows of a CSV producing cleaned rows and storing error rows.
2929
"""
3030

31-
def __init__(self, reader):
31+
def __init__(self, reader, header_normalize_space=False):
3232
self.reader = reader
3333
try:
3434
self.column_names = next(reader)
35+
if header_normalize_space:
36+
self.column_names = [' '.join(column_name.split()) for column_name in self.column_names]
3537
except StopIteration:
3638
self.column_names = []
3739
self.errors = []

csvkit/utilities/csvclean.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,18 @@ class CSVClean(CSVKitUtility):
1313
override_flags = ['L', 'blanks', 'date-format', 'datetime-format']
1414

1515
def add_arguments(self):
16-
pass
16+
self.argparser.add_argument(
17+
'--header-normalize-space', dest='header_normalize_space', action='store_true',
18+
help='Strip leading and trailing whitespace and replace sequences of whitespace characters by a single '
19+
'space in the header.')
1720

1821
def main(self):
1922
if self.additional_input_expected():
2023
sys.stderr.write('No input file or piped data provided. Waiting for standard input:\n')
2124

2225
reader = agate.csv.reader(self.skip_lines(), **self.reader_kwargs)
2326

24-
checker = RowChecker(reader)
27+
checker = RowChecker(reader, header_normalize_space=self.args.header_normalize_space)
2528

2629
output_writer = agate.csv.writer(self.output_file, **self.writer_kwargs)
2730
output_writer.writerow(checker.column_names)

examples/test_header_newline.csv

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"
2+
start
3+
end
4+
",b,c
5+
d,e,f

tests/test_utilities/test_csvclean.py

+6
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,12 @@ def test_no_header_row(self):
7777
['1', '2', '3'],
7878
], [])
7979

80+
def test_header_normalize_space(self):
81+
self.assertCleaned(['--header-normalize-space', 'examples/test_header_newline.csv'], [
82+
['start end', 'b', 'c'],
83+
['d', 'e', 'f'],
84+
], [])
85+
8086
def test_removes_optional_quote_characters(self):
8187
self.assertCleaned(['examples/optional_quote_characters.csv'], [
8288
['a', 'b', 'c'],

0 commit comments

Comments
 (0)