1313
1414import pandas .core .algorithms as algos
1515from pandas .core .arrays import Categorical
16- import pandas .core .common as com
17- from pandas .core .indexes .api import (
18- Index ,
19- MultiIndex ,
20- )
16+ from pandas .core .indexes .api import MultiIndex
2117from pandas .core .reshape .concat import concat
2218from pandas .core .reshape .util import tile_compat
2319from pandas .core .shared_docs import _shared_docs
3127 from pandas import DataFrame
3228
3329
30+ def ensure_list_vars (arg_vars , variable : str , columns ) -> list :
31+ if arg_vars is not None :
32+ if not is_list_like (arg_vars ):
33+ return [arg_vars ]
34+ elif isinstance (columns , MultiIndex ) and not isinstance (arg_vars , list ):
35+ raise ValueError (
36+ f"{ variable } must be a list of tuples when columns are a MultiIndex"
37+ )
38+ else :
39+ return list (arg_vars )
40+ else :
41+ return []
42+
43+
3444@Appender (_shared_docs ["melt" ] % {"caller" : "pd.melt(df, " , "other" : "DataFrame.melt" })
3545def melt (
3646 frame : DataFrame ,
@@ -41,61 +51,35 @@ def melt(
4151 col_level = None ,
4252 ignore_index : bool = True ,
4353) -> DataFrame :
44- # If multiindex, gather names of columns on all level for checking presence
45- # of `id_vars` and `value_vars`
46- if isinstance (frame .columns , MultiIndex ):
47- cols = [x for c in frame .columns for x in c ]
48- else :
49- cols = list (frame .columns )
50-
5154 if value_name in frame .columns :
5255 raise ValueError (
5356 f"value_name ({ value_name } ) cannot match an element in "
5457 "the DataFrame columns."
5558 )
59+ id_vars = ensure_list_vars (id_vars , "id_vars" , frame .columns )
60+ value_vars_was_not_none = value_vars is not None
61+ value_vars = ensure_list_vars (value_vars , "value_vars" , frame .columns )
5662
57- if id_vars is not None :
58- if not is_list_like (id_vars ):
59- id_vars = [id_vars ]
60- elif isinstance (frame .columns , MultiIndex ) and not isinstance (id_vars , list ):
61- raise ValueError (
62- "id_vars must be a list of tuples when columns are a MultiIndex"
63- )
64- else :
65- # Check that `id_vars` are in frame
66- id_vars = list (id_vars )
67- missing = Index (com .flatten (id_vars )).difference (cols )
68- if not missing .empty :
69- raise KeyError (
70- "The following 'id_vars' are not present "
71- f"in the DataFrame: { list (missing )} "
72- )
73- else :
74- id_vars = []
75-
76- if value_vars is not None :
77- if not is_list_like (value_vars ):
78- value_vars = [value_vars ]
79- elif isinstance (frame .columns , MultiIndex ) and not isinstance (value_vars , list ):
80- raise ValueError (
81- "value_vars must be a list of tuples when columns are a MultiIndex"
82- )
83- else :
84- value_vars = list (value_vars )
85- # Check that `value_vars` are in frame
86- missing = Index (com .flatten (value_vars )).difference (cols )
87- if not missing .empty :
88- raise KeyError (
89- "The following 'value_vars' are not present in "
90- f"the DataFrame: { list (missing )} "
91- )
63+ if id_vars or value_vars :
9264 if col_level is not None :
93- idx = frame .columns .get_level_values (col_level ).get_indexer (
94- id_vars + value_vars
65+ level = frame .columns .get_level_values (col_level )
66+ else :
67+ level = frame .columns
68+ labels = id_vars + value_vars
69+ idx = level .get_indexer_for (labels )
70+ missing = idx == - 1
71+ if missing .any ():
72+ missing_labels = [
73+ lab for lab , not_found in zip (labels , missing ) if not_found
74+ ]
75+ raise KeyError (
76+ "The following id_vars or value_vars are not present in "
77+ f"the DataFrame: { missing_labels } "
9578 )
79+ if value_vars_was_not_none :
80+ frame = frame .iloc [:, algos .unique (idx )]
9681 else :
97- idx = algos .unique (frame .columns .get_indexer_for (id_vars + value_vars ))
98- frame = frame .iloc [:, idx ]
82+ frame = frame .copy ()
9983 else :
10084 frame = frame .copy ()
10185
@@ -113,24 +97,26 @@ def melt(
11397 var_name = [
11498 frame .columns .name if frame .columns .name is not None else "variable"
11599 ]
116- if isinstance (var_name , str ):
100+ elif is_list_like (var_name ):
101+ raise ValueError (f"{ var_name = } must be a scalar." )
102+ else :
117103 var_name = [var_name ]
118104
119- N , K = frame .shape
120- K -= len (id_vars )
105+ num_rows , K = frame .shape
106+ num_cols_adjusted = K - len (id_vars )
121107
122108 mdata : dict [Hashable , AnyArrayLike ] = {}
123109 for col in id_vars :
124110 id_data = frame .pop (col )
125111 if not isinstance (id_data .dtype , np .dtype ):
126112 # i.e. ExtensionDtype
127- if K > 0 :
128- mdata [col ] = concat ([id_data ] * K , ignore_index = True )
113+ if num_cols_adjusted > 0 :
114+ mdata [col ] = concat ([id_data ] * num_cols_adjusted , ignore_index = True )
129115 else :
130116 # We can't concat empty list. (GH 46044)
131117 mdata [col ] = type (id_data )([], name = id_data .name , dtype = id_data .dtype )
132118 else :
133- mdata [col ] = np .tile (id_data ._values , K )
119+ mdata [col ] = np .tile (id_data ._values , num_cols_adjusted )
134120
135121 mcolumns = id_vars + var_name + [value_name ]
136122
@@ -143,12 +129,12 @@ def melt(
143129 else :
144130 mdata [value_name ] = frame ._values .ravel ("F" )
145131 for i , col in enumerate (var_name ):
146- mdata [col ] = frame .columns ._get_level_values (i ).repeat (N )
132+ mdata [col ] = frame .columns ._get_level_values (i ).repeat (num_rows )
147133
148134 result = frame ._constructor (mdata , columns = mcolumns )
149135
150136 if not ignore_index :
151- result .index = tile_compat (frame .index , K )
137+ result .index = tile_compat (frame .index , num_cols_adjusted )
152138
153139 return result
154140
0 commit comments