File tree Expand file tree Collapse file tree 3 files changed +16
-3
lines changed
Expand file tree Collapse file tree 3 files changed +16
-3
lines changed Original file line number Diff line number Diff line change @@ -64,6 +64,7 @@ Bug Fixes
6464- Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`)
6565- Bug in :class:`IntervalIndex` constructor when a list of intervals is passed with non-default ``closed`` (:issue:`18334`)
6666- Bug in :meth:`IntervalIndex.copy` when copying and ``IntervalIndex`` with non-default ``closed`` (:issue:`18339`)
67+ - Bug in ``pd.read_csv`` when reading numeric category fields with high cardinality (:issue `18186`)
6768
6869Conversion
6970^^^^^^^^^^
Original file line number Diff line number Diff line change @@ -2227,9 +2227,10 @@ def _concatenate_chunks(list chunks):
22272227 for name in names:
22282228 arrs = [chunk.pop(name) for chunk in chunks]
22292229 # Check each arr for consistent types.
2230- dtypes = set (a.dtype for a in arrs)
2231- if len (dtypes) > 1 :
2232- common_type = np.find_common_type(dtypes, [])
2230+ dtypes = set ([a.dtype for a in arrs])
2231+ numpy_dtypes = {x for x in dtypes if not is_categorical_dtype(x)}
2232+ if len (numpy_dtypes) > 1 :
2233+ common_type = np.find_common_type(numpy_dtypes, [])
22332234 if common_type == np.object:
22342235 warning_columns.append(str (name))
22352236
Original file line number Diff line number Diff line change @@ -114,6 +114,17 @@ def test_categorical_dtype(self):
114114 actual = self .read_csv (StringIO (data ), dtype = 'category' )
115115 tm .assert_frame_equal (actual , expected )
116116
117+ @pytest .mark .slow
118+ def test_categorical_dtype_high_cardinality_numeric (self ):
119+ # GH 18186
120+ data = sorted ([str (i ) for i in range (10 ** 6 )])
121+ expected = pd .DataFrame ({'a' : Categorical (data , ordered = True )})
122+ actual = self .read_csv (StringIO ('a\n ' + '\n ' .join (data )),
123+ dtype = 'category' )
124+ actual .a .cat .reorder_categories (sorted (actual .a .cat .categories ),
125+ ordered = True , inplace = True )
126+ tm .assert_frame_equal (actual , expected )
127+
117128 def test_categorical_dtype_encoding (self ):
118129 # GH 10153
119130 pth = tm .get_data_path ('unicode_series.csv' )
You can’t perform that action at this time.
0 commit comments