Dear jlevy44,
I wanted to use the InteractionTransformer in combination with the XGBClassifier. Following your demo on GitHub, I run:
from xgboost import XGBClassifier
transformer=InteractionTransformer(untrained_model=XGBClassifier(random_state=42, tree_method='hist'),max_train_test_samples=1000,mode_interaction_extract=int(np.sqrt(X_train.shape[1])))
transformer.fit(X_train,y_train)
Where my X_train and y_train are dataframes with shape (700000,39) and (700000,1), respectively.
I get the following error:
---------------------------------------------------------------------------------
ValueError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in create_block_manager_from_blocks(blocks, axes)
1661 blocks = [
-> 1662 make_block(values=blocks[0], placement=slice(0, len(axes[0])))
1663 ]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in make_block(values, placement, klass, ndim, dtype)
2721
-> 2722 return klass(values, ndim=ndim, placement=placement)
2723
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in init(self, values, placement, ndim)
129 if self._validate_ndim and self.ndim and len(self.mgr_locs) != len(self.values):
--> 130 raise ValueError(
131 f"Wrong number of items passed {len(self.values)}, "
ValueError: Wrong number of items passed 1, placement implies 39
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
in
1 from xgboost import XGBClassifier
2 transformer=InteractionTransformer(untrained_model=XGBClassifier(random_state=42),max_train_test_samples=1000,mode_interaction_extract=int(np.sqrt(X_train.shape[1]))) # mode_interaction_extract='sqrt'
----> 3 transformer.fit(X_train,y_train)
~\InteractionTransformer.py in fit(self, X, y)
204 # import pickle
205 # pickle.dump(shap_vals,open('shap_test.pkl','wb'))
--> 206 true_top_interactions=self.get_top_interactions(shap_vals)
207 #print(true_top_interactions)
208 self.design_terms='+'.join((np.core.defchararray.add(np.vectorize(lambda x: "Q('{}')*".format(x))(true_top_interactions.iloc[:,0]),np.vectorize(lambda x: "Q('{}')".format(x))(true_top_interactions.iloc[:,1]))).tolist())
~\InteractionTransformer.py in get_top_interactions(self, shap_vals)
223
224 """
--> 225 interaction_matrix=pd.DataFrame(shap_vals.mean(0),columns=self.features,index=self.features)#reduce(lambda x,y:x+y,shap_vals)/len(shap_vals)
226 interation_matrix_self_interact_removed=interaction_matrix.copy()
227 if not self.self_interactions:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in init(self, data, index, columns, dtype, copy)
495 mgr = init_dict({data.name: data}, index, columns, dtype=dtype)
496 else:
--> 497 mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
498
499 # For data is list-like, or Iterable (will consume into list)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\construction.py in init_ndarray(values, index, columns, dtype, copy)
232 block_values = [values]
233
--> 234 return create_block_manager_from_blocks(block_values, [columns, index])
235
236
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in create_block_manager_from_blocks(blocks, axes)
1670 blocks = [getattr(b, "values", b) for b in blocks]
1671 tot_items = sum(b.shape[0] for b in blocks)
-> 1672 raise construction_error(tot_items, blocks[0].shape[1:], axes, e)
1673
1674
ValueError: Shape of passed values is (39, 1), indices imply (39, 39)
---------------------------------------------------------------------------------
I then tried it with the data provided in your demo and everything worked fine. Do you know what could possibly go wrong?
Thanks in advance,
Hassan
Dear jlevy44,
I wanted to use the InteractionTransformer in combination with the XGBClassifier. Following your demo on GitHub, I run:
from xgboost import XGBClassifier
transformer=InteractionTransformer(untrained_model=XGBClassifier(random_state=42, tree_method='hist'),max_train_test_samples=1000,mode_interaction_extract=int(np.sqrt(X_train.shape[1])))
transformer.fit(X_train,y_train)
Where my X_train and y_train are dataframes with shape (700000,39) and (700000,1), respectively.
I get the following error:
---------------------------------------------------------------------------------
ValueError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in create_block_manager_from_blocks(blocks, axes)
1661 blocks = [
-> 1662 make_block(values=blocks[0], placement=slice(0, len(axes[0])))
1663 ]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in make_block(values, placement, klass, ndim, dtype)
2721
-> 2722 return klass(values, ndim=ndim, placement=placement)
2723
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in init(self, values, placement, ndim)
129 if self._validate_ndim and self.ndim and len(self.mgr_locs) != len(self.values):
--> 130 raise ValueError(
131 f"Wrong number of items passed {len(self.values)}, "
ValueError: Wrong number of items passed 1, placement implies 39
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
in
1 from xgboost import XGBClassifier
2 transformer=InteractionTransformer(untrained_model=XGBClassifier(random_state=42),max_train_test_samples=1000,mode_interaction_extract=int(np.sqrt(X_train.shape[1]))) # mode_interaction_extract='sqrt'
----> 3 transformer.fit(X_train,y_train)
~\InteractionTransformer.py in fit(self, X, y)
204 # import pickle
205 # pickle.dump(shap_vals,open('shap_test.pkl','wb'))
--> 206 true_top_interactions=self.get_top_interactions(shap_vals)
207 #print(true_top_interactions)
208 self.design_terms='+'.join((np.core.defchararray.add(np.vectorize(lambda x: "Q('{}')*".format(x))(true_top_interactions.iloc[:,0]),np.vectorize(lambda x: "Q('{}')".format(x))(true_top_interactions.iloc[:,1]))).tolist())
~\InteractionTransformer.py in get_top_interactions(self, shap_vals)
223
224 """
--> 225 interaction_matrix=pd.DataFrame(shap_vals.mean(0),columns=self.features,index=self.features)#reduce(lambda x,y:x+y,shap_vals)/len(shap_vals)
226 interation_matrix_self_interact_removed=interaction_matrix.copy()
227 if not self.self_interactions:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in init(self, data, index, columns, dtype, copy)
495 mgr = init_dict({data.name: data}, index, columns, dtype=dtype)
496 else:
--> 497 mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
498
499 # For data is list-like, or Iterable (will consume into list)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\construction.py in init_ndarray(values, index, columns, dtype, copy)
232 block_values = [values]
233
--> 234 return create_block_manager_from_blocks(block_values, [columns, index])
235
236
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in create_block_manager_from_blocks(blocks, axes)
1670 blocks = [getattr(b, "values", b) for b in blocks]
1671 tot_items = sum(b.shape[0] for b in blocks)
-> 1672 raise construction_error(tot_items, blocks[0].shape[1:], axes, e)
1673
1674
ValueError: Shape of passed values is (39, 1), indices imply (39, 39)
---------------------------------------------------------------------------------
I then tried it with the data provided in your demo and everything worked fine. Do you know what could possibly go wrong?
Thanks in advance,
Hassan