Skip to content

Commit 89a6a01

Browse files
committed
ENH: pd.DataFrame.info() to show line numbers GH17304
1 parent c0e3767 commit 89a6a01

File tree

3 files changed

+34
-23
lines changed

3 files changed

+34
-23
lines changed

doc/source/whatsnew/v0.23.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,3 +429,4 @@ Other
429429

430430
- Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`)
431431
- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`)
432+
- :func:`DataFrame.info()` now shows line numbers for column summary (:issue:`17304`)

pandas/core/frame.py

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1809,50 +1809,59 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
18091809
lines.append(self.index.summary())
18101810

18111811
if len(self.columns) == 0:
1812-
lines.append('Empty %s' % type(self).__name__)
1812+
lines.append('Empty {name}'.format(name=type(self).__name__))
18131813
_put_lines(buf, lines)
18141814
return
18151815

18161816
cols = self.columns
1817+
cols_count = len(cols)
18171818

18181819
# hack
18191820
if max_cols is None:
1820-
max_cols = get_option('display.max_info_columns',
1821-
len(self.columns) + 1)
1821+
max_cols = get_option('display.max_info_columns', cols_count + 1)
18221822

18231823
max_rows = get_option('display.max_info_rows', len(self) + 1)
18241824

18251825
if null_counts is None:
1826-
show_counts = ((len(self.columns) <= max_cols) and
1826+
show_counts = ((cols_count <= max_cols) and
18271827
(len(self) < max_rows))
18281828
else:
18291829
show_counts = null_counts
1830-
exceeds_info_cols = len(self.columns) > max_cols
1830+
exceeds_info_cols = cols_count > max_cols
18311831

18321832
def _verbose_repr():
1833-
lines.append('Data columns (total %d columns):' %
1834-
len(self.columns))
1835-
space = max(len(pprint_thing(k)) for k in self.columns) + 4
1833+
lines.append('Data columns (total '
1834+
'{count} columns):'.format(count=cols_count))
1835+
space = max([len(pprint_thing(k)) for k in cols])
1836+
space = max(space, len(pprint_thing('Column'))) + 4
1837+
space_num = len(pprint_thing(cols_count))
1838+
space_num = max(space_num, len(pprint_thing('Index'))) + 2
18361839
counts = None
18371840

1838-
tmpl = "%s%s"
1841+
header = _put_str('Index', space_num) + _put_str('Column', space)
1842+
tmpl = '{count}{dtype}'
18391843
if show_counts:
18401844
counts = self.count()
18411845
if len(cols) != len(counts): # pragma: no cover
1842-
raise AssertionError('Columns must equal counts (%d != %d)'
1843-
% (len(cols), len(counts)))
1844-
tmpl = "%s non-null %s"
1845-
1846+
raise AssertionError(
1847+
'Columns must equal counts '
1848+
'({cols_count} != {count})'.format(
1849+
cols_count=cols_count, count=len(counts)))
1850+
header += 'Non-Null Count'
1851+
tmpl = '{count} non-null {dtype}'
1852+
1853+
lines.append(header)
18461854
dtypes = self.dtypes
1847-
for i, col in enumerate(self.columns):
1855+
for i, col in enumerate(cols):
18481856
dtype = dtypes.iloc[i]
18491857
col = pprint_thing(col)
1850-
1851-
count = ""
1858+
line_no = _put_str(' {num}'.format(num=i), space_num)
1859+
count = ''
18521860
if show_counts:
18531861
count = counts.iloc[i]
18541862

1855-
lines.append(_put_str(col, space) + tmpl % (count, dtype))
1863+
lines.append(line_no + _put_str(col, space) +
1864+
tmpl.format(count=count, dtype=dtype))
18561865

18571866
def _non_verbose_repr():
18581867
lines.append(self.columns.summary(name='Columns'))

pandas/tests/frame/test_repr_info.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,8 @@ def test_info_duplicate_columns_shows_correct_dtypes(self):
239239
frame.info(buf=io)
240240
io.seek(0)
241241
lines = io.readlines()
242-
assert 'a 1 non-null int64\n' == lines[3]
243-
assert 'a 1 non-null float64\n' == lines[4]
242+
assert ' 0 a 1 non-null int64\n' == lines[4]
243+
assert ' 1 a 1 non-null float64\n' == lines[5]
244244

245245
def test_info_shows_column_dtypes(self):
246246
dtypes = ['int64', 'float64', 'datetime64[ns]', 'timedelta64[ns]',
@@ -254,20 +254,21 @@ def test_info_shows_column_dtypes(self):
254254
df.info(buf=buf)
255255
res = buf.getvalue()
256256
for i, dtype in enumerate(dtypes):
257-
name = '%d %d non-null %s' % (i, n, dtype)
257+
name = '%s %d non-null %s' % (i, n, dtype)
258+
258259
assert name in res
259260

260261
def test_info_max_cols(self):
261262
df = DataFrame(np.random.randn(10, 5))
262-
for len_, verbose in [(5, None), (5, False), (10, True)]:
263+
for len_, verbose in [(5, None), (5, False), (11, True)]:
263264
# For verbose always ^ setting ^ summarize ^ full output
264265
with option_context('max_info_columns', 4):
265266
buf = StringIO()
266267
df.info(buf=buf, verbose=verbose)
267268
res = buf.getvalue()
268269
assert len(res.strip().split('\n')) == len_
269270

270-
for len_, verbose in [(10, None), (5, False), (10, True)]:
271+
for len_, verbose in [(11, None), (5, False), (11, True)]:
271272

272273
# max_cols no exceeded
273274
with option_context('max_info_columns', 5):
@@ -276,7 +277,7 @@ def test_info_max_cols(self):
276277
res = buf.getvalue()
277278
assert len(res.strip().split('\n')) == len_
278279

279-
for len_, max_cols in [(10, 5), (5, 4)]:
280+
for len_, max_cols in [(11, 5), (5, 4)]:
280281
# setting truncates
281282
with option_context('max_info_columns', 4):
282283
buf = StringIO()

0 commit comments

Comments
 (0)