From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 3 Dec 2018 17:43:52 +0100 Subject: [PATCH 01/13] remove \n from docstring --- pandas/core/arrays/datetimes.py | 26 +++++++++++++------------- pandas/core/arrays/timedeltas.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cfe3afcf3730a..b3df505d56d78 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -82,7 +82,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -1072,19 +1072,19 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") - year = _field_accessor('year', 'Y', "\n The year of the datetime\n") + year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', - "\n The month as January=1, December=12 \n") - day = _field_accessor('day', 'D', "\nThe days of the datetime\n") - hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n") - minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n") - second = _field_accessor('second', 's', "\nThe seconds of the datetime\n") + "The month as January=1, December=12") + day = _field_accessor('day', 'D', "The days of the datetime") + hour = _field_accessor('hour', 'h', "The hours of the datetime") + minute = _field_accessor('minute', 'm', "The minutes of the datetime") + second = _field_accessor('second', 's', "The seconds of the datetime") microsecond = _field_accessor('microsecond', 'us', - "\nThe microseconds of the datetime\n") + "The microseconds of the datetime") nanosecond = _field_accessor('nanosecond', 'ns', - "\nThe nanoseconds of the datetime\n") + "The nanoseconds of the datetime") weekofyear = _field_accessor('weekofyear', 'woy', - "\nThe week ordinal of the year\n") + "The week ordinal of the year") week = weekofyear _dayofweek_doc = """ The day of the week with Monday=0, Sunday=6. @@ -1129,12 +1129,12 @@ def date(self): "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0") dayofyear = _field_accessor('dayofyear', 'doy', - "\nThe ordinal day of the year\n") - quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n") + "The ordinal day of the year") + quarter = _field_accessor('quarter', 'q', "The quarter of the date") days_in_month = _field_accessor( 'days_in_month', 'dim', - "\nThe number of days in the month\n") + "The number of days in the month") daysinmonth = days_in_month _is_month_doc = """ Indicates whether the date is the {first_or_last} day of the month. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 830283d31a929..4afc9f5483c2a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -59,7 +59,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -684,16 +684,16 @@ def to_pytimedelta(self): return tslibs.ints_to_pytimedelta(self.asi8) days = _field_accessor("days", "days", - "\nNumber of days for each element.\n") + "Number of days for each element.") seconds = _field_accessor("seconds", "seconds", - "\nNumber of seconds (>= 0 and less than 1 day) " - "for each element.\n") + "Number of seconds (>= 0 and less than 1 day) " + "for each element.") microseconds = _field_accessor("microseconds", "microseconds", - "\nNumber of microseconds (>= 0 and less " - "than 1 second) for each element.\n") + "Number of microseconds (>= 0 and less " + "than 1 second) for each element.") nanoseconds = _field_accessor("nanoseconds", "nanoseconds", - "\nNumber of nanoseconds (>= 0 and less " - "than 1 microsecond) for each element.\n") + "Number of nanoseconds (>= 0 and less " + "than 1 microsecond) for each element.") @property def components(self): From 7d96cd369c21a056930f746b6ca0893701359773 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 17 Nov 2019 13:45:51 +0100 Subject: [PATCH 02/13] Leading space in to_string when index=False --- pandas/io/formats/format.py | 37 ++++++++++++++---- pandas/tests/io/formats/test_format.py | 48 ++++++++++++++++++++++-- pandas/tests/io/formats/test_to_latex.py | 22 +++++------ 3 files changed, 85 insertions(+), 22 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 41bddc7683764..00c2d60b980f1 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -337,11 +337,17 @@ def _get_formatted_index(self) -> Tuple[List[str], bool]: return fmt_index, have_header def _get_formatted_values(self) -> List[str]: + if self.index: + leading_space = "compat" + else: + leading_space = False + return format_array( self.tr_series._values, None, float_format=self.float_format, na_rep=self.na_rep, + leading_space=leading_space, ) def to_string(self) -> str: @@ -937,6 +943,10 @@ def to_latex( def _format_col(self, i: int) -> List[str]: frame = self.tr_frame formatter = self._get_formatter(i) + if self.index: + leading_space = "compat" + else: + leading_space = False return format_array( frame.iloc[:, i]._values, formatter, @@ -944,6 +954,7 @@ def _format_col(self, i: int) -> List[str]: na_rep=self.na_rep, space=self.col_space, decimal=self.decimal, + leading_space=leading_space, ) def to_html( @@ -1095,7 +1106,7 @@ def format_array( space: Optional[Union[str, int]] = None, justify: str = "right", decimal: str = ".", - leading_space: Optional[bool] = None, + leading_space: bool = "compat", ) -> List[str]: """ Format an array for printing. @@ -1110,7 +1121,7 @@ def format_array( space justify decimal - leading_space : bool, optional + leading_space : bool, default is 'compat' Whether the array should be formatted with a leading space. When an array as a column of a Series or DataFrame, we do want the leading space to pad between columns. @@ -1176,7 +1187,7 @@ def __init__( decimal: str = ".", quoting: Optional[int] = None, fixed_width: bool = True, - leading_space: Optional[bool] = None, + leading_space: bool = "compat", ): self.values = values self.digits = digits @@ -1238,7 +1249,7 @@ def _format(x): is_float_type = lib.map_infer(vals, is_float) & notna(vals) leading_space = self.leading_space - if leading_space is None: + if leading_space == "compat": leading_space = is_float_type.any() fmt_values = [] @@ -1377,8 +1388,12 @@ def format_values_with(float_format): # The default is otherwise to use str instead of a formatting string if self.float_format is None: if self.fixed_width: + if self.leading_space is not False: + fmt_str = "{value: .{digits:d}f}" + else: + fmt_str = "{value:.{digits:d}f}" float_format = partial( - "{value: .{digits:d}f}".format, digits=self.digits + fmt_str.format, digits=self.digits ) # type: Optional[float_format_type] else: float_format = self.float_format @@ -1411,7 +1426,11 @@ def format_values_with(float_format): ).any() if has_small_values or (too_long and has_large_values): - float_format = partial("{value: .{digits:d}e}".format, digits=self.digits) + if self.leading_space is not False: + fmt_str = "{value: .{digits:d}e}" + else: + fmt_str = "{value:.{digits:d}e}" + float_format = partial(fmt_str.format, digits=self.digits) formatted_values = format_values_with(float_format) return formatted_values @@ -1426,7 +1445,11 @@ def _format_strings(self) -> List[str]: class IntArrayFormatter(GenericArrayFormatter): def _format_strings(self) -> List[str]: - formatter = self.formatter or (lambda x: "{x: d}".format(x=x)) + if self.leading_space is False: + fmt_str = "{x:d}" + else: + fmt_str = "{x: d}" + formatter = self.formatter or (lambda x: fmt_str.format(x=x)) fmt_values = [formatter(x) for x in self.values] return fmt_values diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 0f4a7a33dd115..f367a15b85760 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1503,11 +1503,11 @@ def test_to_string_no_index(self): df_s = df.to_string(index=False) # Leading space is expected for positive numbers. - expected = " x y z\n 11 33 AAA\n 22 -44 " + expected = " x y z\n11 33 AAA\n22 -44 " assert df_s == expected df_s = df[["y", "x", "z"]].to_string(index=False) - expected = " y x z\n 33 11 AAA\n-44 22 " + expected = " y x z\n 33 11 AAA\n-44 22 " assert df_s == expected def test_to_string_line_width_no_index(self): @@ -1522,7 +1522,7 @@ def test_to_string_line_width_no_index(self): df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) df_s = df.to_string(line_width=1, index=False) - expected = " x \\\n 11 \n 22 \n 33 \n\n y \n 4 \n 5 \n 6 " + expected = " x \\\n11 \n22 \n33 \n\n y \n 4 \n 5 \n 6 " assert df_s == expected @@ -2219,7 +2219,7 @@ def test_to_string_without_index(self): # GH 11729 Test index=False option s = Series([1, 2, 3, 4]) result = s.to_string(index=False) - expected = " 1\n" + " 2\n" + " 3\n" + " 4" + expected = "1\n" + "2\n" + "3\n" + "4" assert result == expected def test_unicode_name_in_footer(self): @@ -3272,3 +3272,43 @@ def test_filepath_or_buffer_bad_arg_raises(float_frame, method): msg = "buf is not a file name and it has no write method" with pytest.raises(TypeError, match=msg): getattr(float_frame, method)(buf=object()) + + +@pytest.mark.parametrize( + "inputs, expected", + [([" a", " b"], " a\n b"), ([".1", "1"], ".1\n 1"), (["10", "-10"], " 10\n-10")], +) +def test_to_string_index_false_corner_case(inputs, expected): + s = pd.Series(inputs).to_string(index=False) + assert s == expected + + +@pytest.mark.parametrize( + "input_array, expected", + [ + ("a", "a"), + (["a", "b"], "a\nb"), + ([1, "a"], "1\na"), + (1, "1"), + ([0, -1], " 0\n-1"), + (1.0, "1.0"), + ], +) +def test_format_remove_leading_space_series(input_array, expected): + # GH: 24980 + s = pd.Series(input_array).to_string(index=False) + assert s == expected + + +@pytest.mark.parametrize( + "input_array, expected", + [ + ({"A": ["a"]}, "A\na"), + ({"A": ["a", "b"], "B": ["c", "dd"]}, "A B\na c\nb dd"), + ({"A": ["a", 1], "B": ["aa", 1]}, "A B\na aa\n1 1"), + ], +) +def test_format_remove_leading_space_dataframe(input_array, expected): + # GH: 24980 + df = pd.DataFrame(input_array).to_string(index=False) + assert df == expected diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index ea8688517bd93..3b14cf3af902d 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -50,10 +50,10 @@ def test_to_latex(self, float_frame): withoutindex_result = df.to_latex(index=False) withoutindex_expected = r"""\begin{tabular}{rl} \toprule - a & b \\ + a & b \\ \midrule - 1 & b1 \\ - 2 & b2 \\ + 1 & b1 \\ + 2 & b2 \\ \bottomrule \end{tabular} """ @@ -413,7 +413,7 @@ def test_to_latex_longtable(self): withoutindex_result = df.to_latex(index=False, longtable=True) withoutindex_expected = r"""\begin{longtable}{rl} \toprule - a & b \\ + a & b \\ \midrule \endhead \midrule @@ -423,8 +423,8 @@ def test_to_latex_longtable(self): \bottomrule \endlastfoot - 1 & b1 \\ - 2 & b2 \\ + 1 & b1 \\ + 2 & b2 \\ \end{longtable} """ @@ -614,8 +614,8 @@ def test_to_latex_no_header(self): withoutindex_result = df.to_latex(index=False, header=False) withoutindex_expected = r"""\begin{tabular}{rl} \toprule - 1 & b1 \\ - 2 & b2 \\ +1 & b1 \\ +2 & b2 \\ \bottomrule \end{tabular} """ @@ -641,10 +641,10 @@ def test_to_latex_specified_header(self): withoutindex_result = df.to_latex(header=["AA", "BB"], index=False) withoutindex_expected = r"""\begin{tabular}{rl} \toprule -AA & BB \\ +AA & BB \\ \midrule - 1 & b1 \\ - 2 & b2 \\ + 1 & b1 \\ + 2 & b2 \\ \bottomrule \end{tabular} """ From 41bceb9a33b5ca057466d5d810900e91fa76a0c1 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 17 Nov 2019 13:52:21 +0100 Subject: [PATCH 03/13] add whatsnew note --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index c91ced1014dd1..64bd7ff295a10 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -408,6 +408,7 @@ I/O - Bug in :meth:`Styler.background_gradient` not able to work with dtype ``Int64`` (:issue:`28869`) - Bug in :meth:`DataFrame.to_clipboard` which did not work reliably in ipython (:issue:`22707`) - Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`) +- Bug in :meth:`Series.to_string` adding a leading space when ``index=False`` (:issue:`24980`) - Plotting From 2e46bb4b1cdf41d993f51b6821b20a4eff019455 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 17 Nov 2019 16:00:01 +0100 Subject: [PATCH 04/13] fix typing --- pandas/io/formats/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 00c2d60b980f1..757c1a965c9b1 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1106,7 +1106,7 @@ def format_array( space: Optional[Union[str, int]] = None, justify: str = "right", decimal: str = ".", - leading_space: bool = "compat", + leading_space: Union[bool, str] = "compat", ) -> List[str]: """ Format an array for printing. From fe63d83fbc886d1eb773d1d2a619f10c1d684211 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 17 Nov 2019 16:12:19 +0100 Subject: [PATCH 05/13] change doc --- pandas/io/formats/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 757c1a965c9b1..31ac4c4d9ad93 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1121,7 +1121,7 @@ def format_array( space justify decimal - leading_space : bool, default is 'compat' + leading_space : bool or 'compat', default is 'compat' Whether the array should be formatted with a leading space. When an array as a column of a Series or DataFrame, we do want the leading space to pad between columns. From af9d721aa88005a80015e5130eeb2a049fe7fab5 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 17 Nov 2019 16:14:14 +0100 Subject: [PATCH 06/13] combine tests --- pandas/tests/io/formats/test_format.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index f367a15b85760..fe8c80fa18f84 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3274,15 +3274,6 @@ def test_filepath_or_buffer_bad_arg_raises(float_frame, method): getattr(float_frame, method)(buf=object()) -@pytest.mark.parametrize( - "inputs, expected", - [([" a", " b"], " a\n b"), ([".1", "1"], ".1\n 1"), (["10", "-10"], " 10\n-10")], -) -def test_to_string_index_false_corner_case(inputs, expected): - s = pd.Series(inputs).to_string(index=False) - assert s == expected - - @pytest.mark.parametrize( "input_array, expected", [ @@ -3292,6 +3283,9 @@ def test_to_string_index_false_corner_case(inputs, expected): (1, "1"), ([0, -1], " 0\n-1"), (1.0, "1.0"), + ([" a", " b"], " a\n b"), + ([".1", "1"], ".1\n 1"), + (["10", "-10"], " 10\n-10"), ], ) def test_format_remove_leading_space_series(input_array, expected): From b9affd02ddb737597af15149d687673be4b21eb2 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 17 Nov 2019 16:43:34 +0100 Subject: [PATCH 07/13] fix typing hinting --- pandas/io/formats/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 31ac4c4d9ad93..d344b9610dad9 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1187,7 +1187,7 @@ def __init__( decimal: str = ".", quoting: Optional[int] = None, fixed_width: bool = True, - leading_space: bool = "compat", + leading_space: Union[bool, str] = "compat", ): self.values = values self.digits = digits From 17941b6b15bfbde94e869e339d478384f05ce72a Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 17 Nov 2019 19:07:11 +0100 Subject: [PATCH 08/13] type hinting --- pandas/io/formats/format.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index d344b9610dad9..72a4e301be28e 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1106,7 +1106,7 @@ def format_array( space: Optional[Union[str, int]] = None, justify: str = "right", decimal: str = ".", - leading_space: Union[bool, str] = "compat", + leading_space: Union[str, bool] = "compat", ) -> List[str]: """ Format an array for printing. @@ -1187,7 +1187,7 @@ def __init__( decimal: str = ".", quoting: Optional[int] = None, fixed_width: bool = True, - leading_space: Union[bool, str] = "compat", + leading_space: Union[str, bool] = "compat", ): self.values = values self.digits = digits From e5fa13edd20facf7ffb204106f75fecf95f44b1a Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 17 Nov 2019 20:27:08 +0100 Subject: [PATCH 09/13] remove type hinting --- pandas/io/formats/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 72a4e301be28e..cdd673e4f2a22 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1106,7 +1106,7 @@ def format_array( space: Optional[Union[str, int]] = None, justify: str = "right", decimal: str = ".", - leading_space: Union[str, bool] = "compat", + leading_space="compat", ) -> List[str]: """ Format an array for printing. From e02704433ae88f7e2cd9043d094bfe6ef8b9cc6c Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 23 Dec 2019 15:03:35 +0100 Subject: [PATCH 10/13] add type annotation --- pandas/io/formats/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index ab959d6954db0..5d254a2afde26 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1114,7 +1114,7 @@ def format_array( space: Optional[Union[str, int]] = None, justify: str = "right", decimal: str = ".", - leading_space="compat", + leading_space: Union[str, bool] = "compat", ) -> List[str]: """ Format an array for printing. From c63cc824f2cc9526d99678332c13026d78ae91c6 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 23 Dec 2019 15:07:41 +0100 Subject: [PATCH 11/13] remove commetn --- pandas/io/formats/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 5d254a2afde26..ce7eeb80627ef 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1406,7 +1406,7 @@ def format_values_with(float_format): fmt_str = "{value:.{digits:d}f}" float_format = partial( fmt_str.format, digits=self.digits - ) # type: Optional[float_format_type] + ) else: float_format = self.float_format else: From 5e6a730d6ed28b77186f9b52e0558b68af323fa8 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 23 Dec 2019 15:08:14 +0100 Subject: [PATCH 12/13] black reformat --- pandas/io/formats/format.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index ce7eeb80627ef..ead1431d259fc 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1404,9 +1404,7 @@ def format_values_with(float_format): fmt_str = "{value: .{digits:d}f}" else: fmt_str = "{value:.{digits:d}f}" - float_format = partial( - fmt_str.format, digits=self.digits - ) + float_format = partial(fmt_str.format, digits=self.digits) else: float_format = self.float_format else: From d75b4da63e684bcc50a3016887ecd414439ae7ac Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 23 Dec 2019 15:34:32 +0100 Subject: [PATCH 13/13] fix annotation --- pandas/io/formats/format.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index ead1431d259fc..dc13ba993c951 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -339,6 +339,7 @@ def _get_formatted_index(self) -> Tuple[List[str], bool]: return fmt_index, have_header def _get_formatted_values(self) -> List[str]: + leading_space: Union[bool, str] if self.index: leading_space = "compat" else: @@ -951,6 +952,8 @@ def to_latex( def _format_col(self, i: int) -> List[str]: frame = self.tr_frame formatter = self._get_formatter(i) + + leading_space: Union[bool, str] if self.index: leading_space = "compat" else: